Confirmatory Factor Analysis and Structural Equation Models

Work in Progress

cfa
sem
measurment
library(lavaan)
library(dplyr)
library(reticulate)
library(marginaleffects)
library(modelsummary)
library(ggplot2)
library(egg)
library(lme4)
library(semPlot)
library(tinytable)
library(kableExtra)
library(reshape2)

options(rstudio.python.installationPath = "/Users/nathanielforde/mambaforge/envs")
options("modelsummary_factory_default" = "tinytable")
options(repr.plot.width=15, repr.plot.height=8)

knitr::knit_engines$set(python = reticulate::eng_python)
options(scipen=999)
set.seed(100)
Warning

This is a work in progress. We intend to elaborate the choices related to analysing and understanding the data elicted through intentional psychometrics surveys

Measurment and Measurment Constructs

Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.

df = read.csv('sem_data.csv')
inflate <- df$region == 'west'
noise <- rnorm(sum(inflate),1, 0.3) # generate the noise to add
df$ls_p1[inflate] <- df$ls_p1[inflate] + noise
df$ls_sum <- df$ls_p1 + df$ls_p2 + df$ls_p3
df$ls_mean <- rowMeans(df[ c('ls_p1', 'ls_p2', 'ls_p3')])

head(df) |> kable()
ID region gender age se_acad_p1 se_acad_p2 se_acad_p3 se_social_p1 se_social_p2 se_social_p3 sup_friends_p1 sup_friends_p2 sup_friends_p3 sup_parents_p1 sup_parents_p2 sup_parents_p3 ls_p1 ls_p2 ls_p3 ls_sum ls_mean
1 west female 13 4.857143 5.571429 4.500000 5.80 5.500000 5.40 6.5 6.5 7.0 7.0 7.0 6.0 6.182676 6.75 5.50 18.43268 6.144225
2 west male 14 4.571429 4.285714 4.666667 5.00 5.500000 4.80 4.5 4.5 5.5 5.0 6.0 4.5 5.372793 5.00 4.50 14.87279 4.957598
10 west female 14 4.142857 6.142857 5.333333 5.20 4.666667 6.00 4.0 4.5 3.5 7.0 7.0 6.5 7.309658 5.50 4.00 16.80966 5.603219
11 west female 14 5.000000 5.428571 4.833333 6.40 5.833333 6.40 7.0 7.0 7.0 7.0 7.0 7.0 5.599369 6.50 6.25 18.34937 6.116456
12 west female 14 5.166667 5.600000 4.800000 5.25 5.400000 5.25 7.0 7.0 7.0 6.5 6.5 7.0 6.701758 6.00 5.75 18.45176 6.150586
14 west male 14 4.857143 4.857143 4.166667 5.20 5.000000 4.20 5.5 6.5 7.0 6.5 6.5 6.5 6.095589 5.50 5.50 17.09559 5.698530
#| code-fold: true
#| code-summary: "Show the code"

datasummary_skim(df)|> 
 style_tt(
   i = 15:17,
   j = 1:1,
   background = "#20AACC",
   color = "white",
   italic = TRUE) |> 
 style_tt(
   i = 18:19,
   j = 1:1,
   background = "#2888A0",
   color = "white",
   italic = TRUE) |> 
 style_tt(
   i = 2:14,
   j = 1:1,
   background = "#17C2AD",
   color = "white",
   italic = TRUE)
tinytable_gpph4grqpa0secbjdu9w
Unique Missing Pct. Mean SD Min Median Max Histogram
ID 283 0 187.9 106.3 1.0 201.0 367.0
age 5 0 14.7 0.8 13.0 15.0 17.0
se_acad_p1 32 0 5.2 0.8 3.1 5.1 7.0
se_acad_p2 36 0 5.3 0.7 3.1 5.4 7.0
se_acad_p3 29 0 5.2 0.8 2.8 5.2 7.0
se_social_p1 24 0 5.3 0.8 1.8 5.4 7.0
se_social_p2 27 0 5.5 0.7 2.7 5.5 7.0
se_social_p3 31 0 5.4 0.8 3.0 5.5 7.0
sup_friends_p1 13 0 5.8 1.1 1.0 6.0 7.0
sup_friends_p2 10 0 6.0 0.9 2.5 6.0 7.0
sup_friends_p3 13 0 6.0 1.1 1.0 6.0 7.0
sup_parents_p1 11 0 6.0 1.1 1.0 6.0 7.0
sup_parents_p2 11 0 5.9 1.1 2.0 6.0 7.0
sup_parents_p3 13 0 5.7 1.1 1.0 6.0 7.0
ls_p1 156 0 5.7 1.1 2.0 5.7 8.1
ls_p2 21 0 5.8 0.7 2.5 5.8 7.0
ls_p3 22 0 5.2 0.9 2.0 5.2 7.0
ls_sum 218 0 16.7 2.1 8.7 17.0 20.8
ls_mean 217 0 5.6 0.7 2.9 5.7 6.9
N %
region east 142 50.2
west 141 49.8
gender female 132 46.6
male 151 53.4

Fit Initial Regression Models

formula_sum_1st = " ls_sum ~ se_acad_p1  + se_social_p1 +  sup_friends_p1  + sup_parents_p1"
formula_mean_1st = " ls_mean ~ se_acad_p1  + se_social_p1 +  sup_friends_p1  + sup_parents_p1"

formula_sum_12 = " ls_sum ~ se_acad_p1  + se_acad_p2 +  se_social_p1 + se_social_p2 + 
sup_friends_p1 + sup_friends_p2  + sup_parents_p1 + sup_parents_p2"
formula_mean_12 = " ls_mean ~ se_acad_p1  + se_acad_p2 +  se_social_p1 + se_social_p2 + 
sup_friends_p1 + sup_friends_p2  + sup_parents_p1 + sup_parents_p2"


formula_sum = " ls_sum ~ se_acad_p1 + se_acad_p2 + se_acad_p3 + se_social_p1 +  se_social_p2 + se_social_p3 +  sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + sup_parents_p1 + sup_parents_p2 + sup_parents_p3"
formula_mean = " ls_mean ~ se_acad_p1 + se_acad_p2 + se_acad_p3 + se_social_p1 +  se_social_p2 + se_social_p3 +  sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + sup_parents_p1 + sup_parents_p2 + sup_parents_p3"
mod_sum = lm(formula_sum, df)
mod_sum_1st = lm(formula_sum_1st, df)
mod_sum_12 = lm(formula_sum_12, df)
mod_mean = lm(formula_mean, df)
mod_mean_1st = lm(formula_mean_1st, df)
mod_mean_12 = lm(formula_mean_12, df)

min_max_norm <- function(x) {
    (x - min(x)) / (max(x) - min(x))
}

df_norm <- as.data.frame(lapply(df[c(5:19)], min_max_norm))

df_norm$ls_sum <- df$ls_sum
df_norm$ls_mean <- df$ls_mean

mod_sum_norm = lm(formula_sum, df_norm)
mod_mean_norm = lm(formula_mean, df_norm)

models = list(
    "Outcome: sum_score" = list("model_sum_1st_factors" = mod_sum_1st,
     "model_sum_1st_2nd_factors" = mod_sum_12,
     "model_sum_score" = mod_sum,
     "model_sum_score_norm" = mod_sum_norm
     ),
    "Outcome: mean_score" = list(
      "model_mean_1st_factors" = mod_mean_1st,
     "model_mean_1st_2nd_factors" = mod_mean_12,
     "model_mean_score"= mod_mean, 
     "model_mean_score_norm" = mod_mean_norm
    )
    )
#| code-fold: true
#| code-summary: "Show the code"

modelsummary(models, stars=TRUE, shape ="cbind") |> 
 style_tt(
   i = 2:25,
   j = 1:1,
   background = "#17C2AD",
   color = "white",
   italic = TRUE)
tinytable_at0aqqma8uutip7m3xth
Outcome: sum_score Outcome: mean_score
model_sum_1st_factors model_sum_1st_2nd_factors model_sum_score model_sum_score_norm model_mean_1st_factors model_mean_1st_2nd_factors model_mean_score model_mean_score_norm
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) 6.307*** 4.318*** 3.948*** 8.797*** 2.102*** 1.439*** 1.316*** 2.932***
(0.915) (1.020) (1.001) (0.690) (0.305) (0.340) (0.334) (0.230)
se_acad_p1 0.121 -0.098 -0.122 -0.470 0.040 -0.033 -0.041 -0.157
(0.148) (0.186) (0.202) (0.779) (0.049) (0.062) (0.067) (0.260)
se_social_p1 1.107*** 0.663** 0.540* 2.807* 0.369*** 0.221** 0.180* 0.936*
(0.163) (0.212) (0.210) (1.091) (0.054) (0.071) (0.070) (0.364)
sup_friends_p1 0.089 -0.209 -0.251 -1.503 0.030 -0.070 -0.084 -0.501
(0.088) (0.137) (0.158) (0.945) (0.029) (0.046) (0.053) (0.315)
sup_parents_p1 0.567*** 0.332* 0.175 1.053 0.189*** 0.111* 0.058 0.351
(0.100) (0.146) (0.150) (0.901) (0.033) (0.049) (0.050) (0.300)
se_acad_p2 0.360+ 0.352+ 1.357+ 0.120+ 0.117+ 0.452+
(0.204) (0.212) (0.817) (0.068) (0.071) (0.272)
se_social_p2 0.563* 0.364 1.575 0.188* 0.121 0.525
(0.220) (0.230) (0.996) (0.073) (0.077) (0.332)
sup_friends_p2 0.333* 0.331* 1.487* 0.111* 0.110* 0.496*
(0.163) (0.168) (0.755) (0.054) (0.056) (0.252)
sup_parents_p2 0.267+ 0.038 0.189 0.089+ 0.013 0.063
(0.143) (0.151) (0.755) (0.048) (0.050) (0.252)
se_acad_p3 -0.141 -0.588 -0.047 -0.196
(0.183) (0.762) (0.061) (0.254)
se_social_p3 0.385* 1.541* 0.128* 0.514*
(0.168) (0.674) (0.056) (0.225)
sup_friends_p3 0.110 0.657 0.037 0.219
(0.137) (0.820) (0.046) (0.273)
sup_parents_p3 0.492*** 2.953*** 0.164*** 0.984***
(0.132) (0.791) (0.044) (0.264)
Num.Obs. 283 283 283 283 283 283 283 283
R2 0.377 0.420 0.460 0.460 0.377 0.420 0.460 0.460
R2 Adj. 0.368 0.403 0.436 0.436 0.368 0.403 0.436 0.436
AIC 1096.3 1084.4 1071.9 1071.9 474.5 462.6 450.1 450.1
BIC 1118.1 1120.9 1122.9 1122.9 496.3 499.1 501.1 501.1
Log.Lik. -542.137 -532.214 -521.943 -521.943 -231.230 -221.306 -211.035 -211.035
RMSE 1.64 1.59 1.53 1.53 0.55 0.53 0.51 0.51
models = list(
     "model_sum_1st_factors" = mod_sum_1st,
     "model_sum_1st_2nd_factors" = mod_sum_12,
     "model_sum_score" = mod_sum,
     "model_sum_score_norm" = mod_sum_norm,
     "model_mean_1st_factors" = mod_mean_1st,
     "model_mean_1st_2nd_factors" = mod_mean_12,
     "model_mean_score"= mod_mean,
     "model_mean_score_norm" = mod_mean_norm
    )

modelplot(models, coef_omit = 'Intercept') + geom_vline(xintercept = 0, linetype="dotted", 
                color = "black") + ggtitle("Comparing Model Parameter Estimates", "Across Covariates")

Significant Coefficients

g1 = modelplot(mod_sum, coef_omit = 'Intercept') +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"),  guide = "none")


g2 = modelplot(mod_mean, coef_omit = 'Intercept') +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"))

plot <- ggarrange(g1,g2, ncol=2, nrow=1);

Aggregate Driver Scores

df$se_acad_mean <- rowMeans(df[c('se_acad_p1', 'se_acad_p2', 'se_acad_p3')])
df$se_social_mean <- rowMeans(df[c('se_social_p1', 'se_social_p2', 'se_social_p3')])
df$sup_friends_mean <- rowMeans(df[c('sup_friends_p1', 'sup_friends_p2', 'sup_friends_p3')])
df$sup_parents_mean <- rowMeans(df[c('sup_parents_p1', 'sup_parents_p2', 'sup_parents_p3')])


formula_parcel_mean = "ls_mean ~ se_acad_mean + se_social_mean + sup_friends_mean + sup_parents_mean"

formula_parcel_sum = "ls_sum ~ se_acad_mean + se_social_mean + sup_friends_mean + sup_parents_mean"

mod_sum_parcel = lm(formula_parcel_sum, df)
mod_mean_parcel = lm(formula_parcel_mean, df)

models_parcel = list("model_sum_score" = mod_sum_parcel,
     "model_mean_score"= mod_mean_parcel
     )

modelsummary(models_parcel, stars=TRUE)
tinytable_tw8zxvpf6bcuywhdyj4e
model_sum_score model_mean_score
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) 4.559*** 1.520***
(0.972) (0.324)
se_acad_mean 0.114 0.038
(0.165) (0.055)
se_social_mean 1.269*** 0.423***
(0.183) (0.061)
sup_friends_mean 0.075 0.025
(0.102) (0.034)
sup_parents_mean 0.721*** 0.240***
(0.104) (0.035)
Num.Obs. 283 283
R2 0.435 0.435
R2 Adj. 0.427 0.427
AIC 1068.9 447.1
BIC 1090.7 468.9
Log.Lik. -528.436 -217.528
RMSE 1.57 0.52
g1 = modelplot(mod_sum_parcel, coef_omit = 'Intercept') +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"), guide = "none")


g2 = modelplot(mod_mean_parcel, coef_omit = 'Intercept') +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"))

plot <- ggarrange(g1,g2, ncol=2, nrow=1);

Hierarchical Models

formula_hierarchy_mean = "ls_mean ~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3 + sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + se_acad_p1 + se_acad_p2 + se_acad_p3 +
se_social_p1 + se_social_p2 + se_social_p3  + (1 | region)"

formula_hierarchy_sum = "ls_sum ~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3 + sup_friends_p1 + sup_friends_p2 + sup_friends_p3 + se_acad_p1 + se_acad_p2 + se_acad_p3 +
se_social_p1 + se_social_p2 + se_social_p3 + (1 | region)"

hierarchical_mean_fit <- lmer(formula_hierarchy_mean, data = df, REML = TRUE)
hierarchical_sum_fit <- lmer(formula_hierarchy_sum, data = df, REML = TRUE)

g1 = modelplot(hierarchical_mean_fit, re.form=NA) +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"),  guide = "none")

g2 = modelplot(hierarchical_sum_fit, re.form=NA) +  aes(color = ifelse(p.value < 0.001, "Significant", "Not significant")) +
  scale_color_manual(values = c("grey", "black"))


plot <- ggarrange(g1,g2, ncol=2, nrow=1);
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_segment()`).
Removed 2 rows containing missing values or values outside the scale range
(`geom_segment()`).

modelsummary(list("hierarchical_mean_fit"= hierarchical_mean_fit,
                  "hierarchical_sum_fit"= hierarchical_sum_fit), 
             stars = TRUE) |> 
 style_tt(
   i = 2:25,
   j = 1:1,
   background = "#17C2AD",
   color = "white",
   italic = TRUE)
tinytable_tx274em7aecbrkz6pyi9
hierarchical_mean_fit hierarchical_sum_fit
+ p < 0.1, * p < 0.05, ** p < 0.01, *** p < 0.001
(Intercept) 0.784* 2.352*
(0.377) (1.132)
sup_parents_p1 0.021 0.062
(0.048) (0.145)
sup_parents_p2 0.036 0.108
(0.048) (0.145)
sup_parents_p3 0.179*** 0.537***
(0.042) (0.126)
sup_friends_p1 -0.084+ -0.253+
(0.050) (0.150)
sup_friends_p2 0.115* 0.346*
(0.053) (0.160)
sup_friends_p3 0.045 0.136
(0.043) (0.130)
se_acad_p1 -0.069 -0.207
(0.064) (0.193)
se_acad_p2 0.123+ 0.369+
(0.067) (0.202)
se_acad_p3 0.053 0.160
(0.061) (0.184)
se_social_p1 0.117+ 0.352+
(0.068) (0.203)
se_social_p2 0.170* 0.509*
(0.074) (0.221)
se_social_p3 0.149** 0.447**
(0.054) (0.161)
SD (Intercept region) 0.248 0.744
SD (Observations) 0.498 1.493
Num.Obs. 283 283
R2 Marg. 0.456 0.456
R2 Cond. 0.564 0.564
AIC 486.1 1079.3
BIC 540.8 1134.0
ICC 0.2 0.2
RMSE 0.49 1.46

Marginal Effects

pred <- predictions(hierarchical_mean_fit, newdata = datagrid(sup_parents_p3 = 1:10, region=c('east', 'west')), re.form=NA)
pred1 <- predictions(mod_mean, newdata = datagrid(sup_parents_p2 = 1:10))

pred1 |> head() |> kableExtra::kable()
rowid estimate std.error statistic p.value s.value conf.low conf.high se_acad_p1 se_acad_p2 se_acad_p3 se_social_p1 se_social_p2 se_social_p3 sup_friends_p1 sup_friends_p2 sup_friends_p3 sup_parents_p1 sup_parents_p3 sup_parents_p2 ls_mean
1 5.501073 0.2498601 22.01661 0 354.4488 5.011356 5.990790 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 1 6.144225
2 5.513680 0.2000338 27.56375 0 553.1635 5.121621 5.905739 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 2 6.144225
3 5.526287 0.1505320 36.71170 0 977.7205 5.231249 5.821324 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 3 6.144225
4 5.538894 0.1018295 54.39382 0 Inf 5.339312 5.738476 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 4 6.144225
5 5.551501 0.0560496 99.04623 0 Inf 5.441645 5.661356 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 5 6.144225
6 5.564107 0.0312544 178.02653 0 Inf 5.502850 5.625365 5.154955 5.346853 5.211248 5.290047 5.477267 5.440989 5.786219 6.010601 5.991166 5.975265 5.719081 6 6.144225

Regression Marginal Effects

g = plot_predictions(mod_mean, condition = c("sup_parents_p3"), type = "response") + ggtitle("Counterfactual Shift of Outcome: sup_parents_p3", "Holding all else Fixed")

g1 = plot_predictions(mod_mean, condition = c("sup_friends_p1"), type = "response") + ggtitle("Counterfactual Shift of Outcome: sup_friends_p1", "Holding all else Fixed")

g2 = plot_predictions(mod_mean, condition = c("se_acad_p1"), 
                      type = "response") + ggtitle("Counterfactual Shift of Outcome: se_acad_p1", "Holding all else Fixed")

plot <- ggarrange(g,g1,g2, ncol=1, nrow=3);

Hierarchical Marginal Effects

g = plot_predictions(hierarchical_mean_fit, condition = c("sup_parents_p3", "region"), type = "response", re.form=NA) + ggtitle("Counterfactual Shift of Outcome: sup_parents_p3", "Holding all else Fixed")

g1 = plot_predictions(hierarchical_mean_fit, condition = c("sup_friends_p1", "region"), type = "response", re.form=NA) + ggtitle("Counterfactual Shift of Outcome: sup_friends_p1", "Holding all else Fixed")

g2 = plot_predictions(hierarchical_mean_fit, condition = c("se_acad_p1", "region"), 
                      type = "response", re.form=NA) + ggtitle("Counterfactual Shift of Outcome: se_acad_p1", "Holding all else Fixed")

plot <- ggarrange(g,g1,g2, ncol=1, nrow=3);

Confirmatory Factor Analysis

model_measurement <- "
# Measurement model
SUP_Parents =~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3
SUP_Friends =~ sup_friends_p1 + sup_friends_p2 + sup_friends_p3
SE_Academic =~ se_acad_p1 + se_acad_p2 + se_acad_p3
SE_Social =~ se_social_p1 + se_social_p2 + se_social_p3
LS  =~ ls_p1 + ls_p2 + ls_p3
"


fit_mod <- cfa(model_measurement, data = df)
modelplot(fit_mod, coef_omit='=~')

summary(fit_mod, fit.measures = TRUE, standardized = TRUE)
lavaan 0.6-18 ended normally after 50 iterations

  Estimator                                         ML
  Optimization method                           NLMINB
  Number of model parameters                        40

  Number of observations                           283

Model Test User Model:
                                                      
  Test statistic                               208.228
  Degrees of freedom                                80
  P-value (Chi-square)                           0.000

Model Test Baseline Model:

  Test statistic                              2596.891
  Degrees of freedom                               105
  P-value                                        0.000

User Model versus Baseline Model:

  Comparative Fit Index (CFI)                    0.949
  Tucker-Lewis Index (TLI)                       0.932

Loglikelihood and Information Criteria:

  Loglikelihood user model (H0)              -4366.931
  Loglikelihood unrestricted model (H1)      -4262.817
                                                      
  Akaike (AIC)                                8813.863
  Bayesian (BIC)                              8959.680
  Sample-size adjusted Bayesian (SABIC)       8832.840

Root Mean Square Error of Approximation:

  RMSEA                                          0.075
  90 Percent confidence interval - lower         0.063
  90 Percent confidence interval - upper         0.088
  P-value H_0: RMSEA <= 0.050                    0.001
  P-value H_0: RMSEA >= 0.080                    0.277

Standardized Root Mean Square Residual:

  SRMR                                           0.055

Parameter Estimates:

  Standard errors                             Standard
  Information                                 Expected
  Information saturated (h1) model          Structured

Latent Variables:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  SUP_Parents =~                                                        
    sup_parents_p1    1.000                               0.937    0.875
    sup_parents_p2    1.032    0.055   18.643    0.000    0.967    0.885
    sup_parents_p3    0.995    0.059   16.808    0.000    0.932    0.816
  SUP_Friends =~                                                        
    sup_friends_p1    1.000                               1.020    0.905
    sup_friends_p2    0.794    0.043   18.474    0.000    0.810    0.857
    sup_friends_p3    0.892    0.050   17.738    0.000    0.910    0.831
  SE_Academic =~                                                        
    se_acad_p1        1.000                               0.697    0.880
    se_acad_p2        0.805    0.049   16.284    0.000    0.561    0.819
    se_acad_p3        0.951    0.058   16.495    0.000    0.663    0.828
  SE_Social =~                                                          
    se_social_p1      1.000                               0.641    0.847
    se_social_p2      0.957    0.055   17.297    0.000    0.614    0.880
    se_social_p3      0.923    0.066   13.926    0.000    0.592    0.741
  LS =~                                                                 
    ls_p1             1.000                               0.519    0.487
    ls_p2             0.989    0.137    7.235    0.000    0.513    0.705
    ls_p3             1.194    0.165    7.225    0.000    0.619    0.702

Covariances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
  SUP_Parents ~~                                                        
    SUP_Friends       0.132    0.064    2.070    0.038    0.138    0.138
    SE_Academic       0.219    0.046    4.733    0.000    0.336    0.336
    SE_Social         0.285    0.046    6.248    0.000    0.474    0.474
    LS                0.339    0.057    5.923    0.000    0.698    0.698
  SUP_Friends ~~                                                        
    SE_Academic       0.071    0.047    1.494    0.135    0.100    0.100
    SE_Social         0.195    0.046    4.254    0.000    0.298    0.298
    LS                0.156    0.044    3.505    0.000    0.295    0.295
  SE_Academic ~~                                                        
    SE_Social         0.274    0.036    7.532    0.000    0.614    0.614
    LS                0.178    0.036    4.972    0.000    0.493    0.493
  SE_Social ~~                                                          
    LS                0.263    0.043    6.150    0.000    0.790    0.790

Variances:
                   Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
   .sup_parents_p1    0.270    0.037    7.313    0.000    0.270    0.235
   .sup_parents_p2    0.259    0.038    6.856    0.000    0.259    0.217
   .sup_parents_p3    0.436    0.047    9.201    0.000    0.436    0.334
   .sup_friends_p1    0.229    0.040    5.697    0.000    0.229    0.180
   .sup_friends_p2    0.237    0.030    7.914    0.000    0.237    0.265
   .sup_friends_p3    0.371    0.042    8.807    0.000    0.371    0.309
   .se_acad_p1        0.141    0.022    6.481    0.000    0.141    0.225
   .se_acad_p2        0.154    0.018    8.653    0.000    0.154    0.329
   .se_acad_p3        0.202    0.024    8.405    0.000    0.202    0.315
   .se_social_p1      0.162    0.020    8.003    0.000    0.162    0.282
   .se_social_p2      0.109    0.016    6.757    0.000    0.109    0.225
   .se_social_p3      0.288    0.028   10.138    0.000    0.288    0.452
   .ls_p1             0.866    0.078   11.071    0.000    0.866    0.763
   .ls_p2             0.267    0.030    8.996    0.000    0.267    0.504
   .ls_p3             0.395    0.044    9.044    0.000    0.395    0.507
    SUP_Parents       0.878    0.098    8.940    0.000    1.000    1.000
    SUP_Friends       1.041    0.111    9.397    0.000    1.000    1.000
    SE_Academic       0.485    0.054    8.912    0.000    1.000    1.000
    SE_Social         0.411    0.049    8.472    0.000    1.000    1.000
    LS                0.269    0.068    3.958    0.000    1.000    1.000
semPlot::semPaths(fit_mod, whatLabels = 'std', intercepts = FALSE)

Comparing the empirical and implied variance-covariance matrix

heat_df = data.frame(resid(fit_mod, type = "standardized")$cov) 
heat_df = heat_df |> as.matrix() |> melt()
colnames(heat_df) <- c("x", "y", "value")

heat_df  |> mutate(value = round(value, 2)) |>  ggplot(aes(x = x, y = y, fill = value)) +
  geom_tile() + geom_text(aes(label = value), color = "black", size = 4) +
 scale_fill_gradient2(
    high = 'dodgerblue4',
    mid = 'white',
    low = 'firebrick2'
  ) + theme(axis.text.x = element_text(angle=45)) + ggtitle("Residuals of the Sample Covariances and Model Implied Covariances", "A Visual Check of Model fit")

Structural Equation Models

model <- "
# Measurement model
SUP_Parents =~ sup_parents_p1 + sup_parents_p2 + sup_parents_p3
SUP_Friends =~ sup_friends_p1 + sup_friends_p2 + sup_friends_p3
SE_Academic =~ se_acad_p1 + se_acad_p2 + se_acad_p3
SE_Social =~ se_social_p1 + se_social_p2 + se_social_p3
LS  =~ ls_p1 + ls_p2 + ls_p3

# Structural model 
# Regressions
LS ~ SE_Academic + SE_Social + SUP_Parents + SUP_Friends

# Residual covariances
SE_Academic ~~ SE_Social
"

fit_mod_sem <- cfa(model_measurement, data = df)
modelplot(fit_mod, coef_omit='=~')

```

Citation

BibTeX citation:
@online{forde,
  author = {Nathaniel Forde},
  title = {Confirmatory {Factor} {Analysis} and {Structural} {Equation}
    {Models}},
  langid = {en}
}
For attribution, please cite this work as:
Nathaniel Forde. n.d. “Confirmatory Factor Analysis and Structural Equation Models.”